1   /*
2    * dk.brics.automaton
3    * 
4    * Copyright (c) 2001-2009 Anders Moeller
5    * All rights reserved.
6    * 
7    * Redistribution and use in source and binary forms, with or without
8    * modification, are permitted provided that the following conditions
9    * are met:
10   * 1. Redistributions of source code must retain the above copyright
11   *    notice, this list of conditions and the following disclaimer.
12   * 2. Redistributions in binary form must reproduce the above copyright
13   *    notice, this list of conditions and the following disclaimer in the
14   *    documentation and/or other materials provided with the distribution.
15   * 3. The name of the author may not be used to endorse or promote products
16   *    derived from this software without specific prior written permission.
17   * 
18   * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19   * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21   * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24   * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25   * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27   * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28   */
29  
30  package org.apache.lucene.util.automaton;
31  
32  import java.util.ArrayList;
33  import java.util.BitSet;
34  import java.util.HashSet;
35  import java.util.LinkedList;
36  
37  /**
38   * Operations for minimizing automata.
39   * 
40   * @lucene.experimental
41   */
42  final public class MinimizationOperations {
43    
44    private MinimizationOperations() {}
45  
46    /**
47     * Minimizes (and determinizes if not already deterministic) the given
48     * automaton using Hopcroft's algorighm.
49     * @param maxDeterminizedStates maximum number of states determinizing the
50     *  automaton can result in.  Set higher to allow more complex queries and
51     *  lower to prevent memory exhaustion.
52     */
53    public static Automaton minimize(Automaton a, int maxDeterminizedStates) {
54      if (a.getNumStates() == 0 || (a.isAccept(0) == false && a.getNumTransitions(0) == 0)) {
55        // Fastmatch for common case
56        return new Automaton();
57      }
58      a = Operations.determinize(a, maxDeterminizedStates);
59      //a.writeDot("adet");
60      if (a.getNumTransitions(0) == 1) {
61        Transition t = new Transition();
62        a.getTransition(0, 0, t);
63        if (t.dest == 0 && t.min == Character.MIN_CODE_POINT
64            && t.max == Character.MAX_CODE_POINT) {
65          // Accepts all strings
66          return a;
67        }
68      }
69      a = Operations.totalize(a);
70      //a.writeDot("atot");
71  
72      // initialize data structures
73      final int[] sigma = a.getStartPoints();
74      final int sigmaLen = sigma.length, statesLen = a.getNumStates();
75  
76      @SuppressWarnings({"rawtypes","unchecked"}) final ArrayList<Integer>[][] reverse =
77        (ArrayList<Integer>[][]) new ArrayList[statesLen][sigmaLen];
78      @SuppressWarnings({"rawtypes","unchecked"}) final HashSet<Integer>[] partition =
79        (HashSet<Integer>[]) new HashSet[statesLen];
80      @SuppressWarnings({"rawtypes","unchecked"}) final ArrayList<Integer>[] splitblock =
81        (ArrayList<Integer>[]) new ArrayList[statesLen];
82      final int[] block = new int[statesLen];
83      final StateList[][] active = new StateList[statesLen][sigmaLen];
84      final StateListNode[][] active2 = new StateListNode[statesLen][sigmaLen];
85      final LinkedList<IntPair> pending = new LinkedList<>();
86      final BitSet pending2 = new BitSet(sigmaLen*statesLen);
87      final BitSet split = new BitSet(statesLen), 
88        refine = new BitSet(statesLen), refine2 = new BitSet(statesLen);
89      for (int q = 0; q < statesLen; q++) {
90        splitblock[q] = new ArrayList<>();
91        partition[q] = new HashSet<>();
92        for (int x = 0; x < sigmaLen; x++) {
93          active[q][x] = new StateList();
94        }
95      }
96      // find initial partition and reverse edges
97      for (int q = 0; q < statesLen; q++) {
98        final int j = a.isAccept(q) ? 0 : 1;
99        partition[j].add(q);
100       block[q] = j;
101       for (int x = 0; x < sigmaLen; x++) {
102         final ArrayList<Integer>[] r = reverse[a.step(q, sigma[x])];
103         if (r[x] == null) {
104           r[x] = new ArrayList<>();
105         }
106         r[x].add(q);
107       }
108     }
109     // initialize active sets
110     for (int j = 0; j <= 1; j++) {
111       for (int x = 0; x < sigmaLen; x++) {
112         for (int q : partition[j]) {
113           if (reverse[q][x] != null) {
114             active2[q][x] = active[j][x].add(q);
115           }
116         }
117       }
118     }
119 
120     // initialize pending
121     for (int x = 0; x < sigmaLen; x++) {
122       final int j = (active[0][x].size <= active[1][x].size) ? 0 : 1;
123       pending.add(new IntPair(j, x));
124       pending2.set(x*statesLen + j);
125     }
126 
127     // process pending until fixed point
128     int k = 2;
129     //System.out.println("start min");
130     while (!pending.isEmpty()) {
131       //System.out.println("  cycle pending");
132       final IntPair ip = pending.removeFirst();
133       final int p = ip.n1;
134       final int x = ip.n2;
135       //System.out.println("    pop n1=" + ip.n1 + " n2=" + ip.n2);
136       pending2.clear(x*statesLen + p);
137       // find states that need to be split off their blocks
138       for (StateListNode m = active[p][x].first; m != null; m = m.next) {
139         final ArrayList<Integer> r = reverse[m.q][x];
140         if (r != null) {
141           for (int i : r) {
142             if (!split.get(i)) {
143               split.set(i);
144               final int j = block[i];
145               splitblock[j].add(i);
146               if (!refine2.get(j)) {
147                 refine2.set(j);
148                 refine.set(j);
149               }
150             }
151           }
152         }
153       }
154 
155       // refine blocks
156       for (int j = refine.nextSetBit(0); j >= 0; j = refine.nextSetBit(j+1)) {
157         final ArrayList<Integer> sb = splitblock[j];
158         if (sb.size() < partition[j].size()) {
159           final HashSet<Integer> b1 = partition[j];
160           final HashSet<Integer> b2 = partition[k];
161           for (int s : sb) {
162             b1.remove(s);
163             b2.add(s);
164             block[s] = k;
165             for (int c = 0; c < sigmaLen; c++) {
166               final StateListNode sn = active2[s][c];
167               if (sn != null && sn.sl == active[j][c]) {
168                 sn.remove();
169                 active2[s][c] = active[k][c].add(s);
170               }
171             }
172           }
173           // update pending
174           for (int c = 0; c < sigmaLen; c++) {
175             final int aj = active[j][c].size,
176               ak = active[k][c].size,
177               ofs = c*statesLen;
178             if (!pending2.get(ofs + j) && 0 < aj && aj <= ak) {
179               pending2.set(ofs + j);
180               pending.add(new IntPair(j, c));
181             } else {
182               pending2.set(ofs + k);
183               pending.add(new IntPair(k, c));
184             }
185           }
186           k++;
187         }
188         refine2.clear(j);
189         for (int s : sb) {
190           split.clear(s);
191         }
192         sb.clear();
193       }
194       refine.clear();
195     }
196 
197     Automaton result = new Automaton();
198 
199     Transition t = new Transition();
200 
201     //System.out.println("  k=" + k);
202 
203     // make a new state for each equivalence class, set initial state
204     int[] stateMap = new int[statesLen];
205     int[] stateRep = new int[k];
206 
207     result.createState();
208 
209     //System.out.println("min: k=" + k);
210     for (int n = 0; n < k; n++) {
211       //System.out.println("    n=" + n);
212 
213       boolean isInitial = false;
214       for (int q : partition[n]) {
215         if (q == 0) {
216           isInitial = true;
217           //System.out.println("    isInitial!");
218           break;
219         }
220       }
221 
222       int newState;
223       if (isInitial) {
224         newState = 0;
225       } else {
226         newState = result.createState();
227       }
228 
229       //System.out.println("  newState=" + newState);
230 
231       for (int q : partition[n]) {
232         stateMap[q] = newState;
233         //System.out.println("      q=" + q + " isAccept?=" + a.isAccept(q));
234         result.setAccept(newState, a.isAccept(q));
235         stateRep[newState] = q;   // select representative
236       }
237     }
238 
239     // build transitions and set acceptance
240     for (int n = 0; n < k; n++) {
241       int numTransitions = a.initTransition(stateRep[n], t);
242       for(int i=0;i<numTransitions;i++) {
243         a.getNextTransition(t);
244         //System.out.println("  add trans");
245         result.addTransition(n, stateMap[t.dest], t.min, t.max);
246       }
247     }
248     result.finishState();
249     //System.out.println(result.getNumStates() + " states");
250 
251     return Operations.removeDeadStates(result);
252   }
253   
254   static final class IntPair {
255     
256     final int n1, n2;
257     
258     IntPair(int n1, int n2) {
259       this.n1 = n1;
260       this.n2 = n2;
261     }
262   }
263   
264   static final class StateList {
265     
266     int size;
267     
268     StateListNode first, last;
269     
270     StateListNode add(int q) {
271       return new StateListNode(q, this);
272     }
273   }
274   
275   static final class StateListNode {
276     
277     final int q;
278     
279     StateListNode next, prev;
280     
281     final StateList sl;
282     
283     StateListNode(int q, StateList sl) {
284       this.q = q;
285       this.sl = sl;
286       if (sl.size++ == 0) sl.first = sl.last = this;
287       else {
288         sl.last.next = this;
289         prev = sl.last;
290         sl.last = this;
291       }
292     }
293     
294     void remove() {
295       sl.size--;
296       if (sl.first == this) sl.first = next;
297       else prev.next = next;
298       if (sl.last == this) sl.last = prev;
299       else next.prev = prev;
300     }
301   }
302 }